import pprint
import requests     # 2.18.4
import json         # 2.0.9
import pandas as pd # 0.23.0


# Package list of the Swiss open data portal
packages = 'https://opendata.swiss/api/3/action/package_list'


# Make the HTTP request
response = requests.get(packages)

# Use the json module to load CKAN's response into a dictionary
response_dict = json.loads(response.content)

# Check the contents of the response
assert response_dict['success'] is True  # make sure if response is OK


datasets = response_dict['result']         # extract all the packages from the response
print(len(datasets))                       # print the total number of datasets

6868


datasets[-10:]

['zuzuge-nach-jahr-quartier-geschlecht-altersgruppe-zivilstand-und-familienstellung-nachfuhrung-e',
 'zuzuge-nach-monat-stadtquartier-geschlecht-altersgruppe-und-herkunft-seit-20135',
 'zuzuge-nach-zuzugsort-stadtquartier-geschlecht-altersgruppe-und-zivilstand-seit-1993',
 'zuzuge-nach-zuzugsort-und-stadtquartier-seit-1993',
 'zuzuge-pers',
 'zvv-fahrplan-tram-und-bus',
 'zwangsnutzungen',
 'zweigstellen-der-musikschule-konservatorium-zurich-mkz',
 'zweiradabstellplatze-in-der-stadt-zurich2',
 'zweite-vornamen-neugeborener-madchen-und-knaben-mit-wohnsitz-in-der-stadt-zurich-seit-1993']


# Specify the package you are interested in:
package = 'bruttoinlandprodukt'


# Base url for package information. This is always the same.
base_url = 'https://opendata.swiss/api/3/action/package_show?id='

# Construct the url for the package of interest
package_information_url = base_url + package

# Make the HTTP request
package_information = requests.get(package_information_url)

# Use the json module to load CKAN's response into a dictionary
package_dict = json.loads(package_information.content)

# Check the contents of the response.
assert package_dict['success'] is True  # again make sure if response is OK
package_dict = package_dict['result']   # we only need the 'result' part from the dictionary

# pprint.pprint(package_dict)           # pretty print the package information to screen


# Get the url for the data from the dictionary
data_url = package_dict['resources'][0]['url']
print('Data url:     ' + data_url)

# Print the data format
data_format = package_dict['resources'][0]['format']
print('Data format:  ' + data_format)

Data url:     https://github.com/StataBS/indikatoren/tree/master/data/4323.tsv
Data format:  TSV


# If data is hosted at GitHub, always download the raw data
if data_url.startswith('https://github.com/'):
    data_url = data_url.replace('https://github.com/', 'https://raw.githubusercontent.com/')
    data_url = data_url.replace('tree/', '')
print('Data url:     ' + data_url)

Data url:     https://raw.githubusercontent.com/StataBS/indikatoren/master/data/4323.tsv


# List of formats we work with in this exercise
csv = ['comma-separated-values', 'CSV', 'csv']
tsv = ['tab-separated-values', 'TSV', 'tsv']
xls = ['XLS']

# Download the data to a Pandas DataFrame. Use seperate function calls, depending on the format of the dataset.
if any(s in data_format for s in csv):     # pd.read_csv()
    df = pd.read_csv(data_url)
elif any(s in data_format for s in tsv):   # pd.read_csv() and specify the delimiter
    df = pd.read_csv(data_url, sep='\t')
elif any(s in data_format for s in xls):   # pd.read_excel()
    df = pd.read_excel(data_url)
else:
    print('Sorry, the data format is not supported for this exercise')

# Print the first rows to the screen to inspect the dataset    
df.head(5)


# Remove the column 'DateTime', because it is empty
df.drop('DateTime', axis=1, inplace=True)

# Make 'Jahr' the index
df.set_index('Jahr', inplace=True)


# Use IPython's "magic" in Jupyter Notebook to directly show the plot on the screen.
%matplotlib inline
df.plot()

<matplotlib.axes._subplots.AxesSubplot at 0x115c8ada0>

	Jahr	Bruttoinlandprodukt
0	1980	8219.2
1	1981	8754.3
2	1982	9459.6
3	1983	9879.3
4	1984	10619.4

Tutorial for retrieving data from the Swiss Open Data portal¶